# SETWD ===========================
rm(list=ls())


# LIBRARIES ===========================

library(tidyverse)
library(foreign)
library(stargazer)
library(lmtest)
library(multiwayvcov)

# IMPORT DATA ===========================

dfgi <- read.csv("data/sve-08_04_24.csv",stringsAsFactors = F)


## FIG 3. Coefficients Relating Distinct Measures of Inequality to the Risk of Erosion ----

vars <- c("gini_disp","wiid.gini",
          "wb.gini","wid.gini",
          "pre.wid.gini",
          "inc.top1","inc.top10","inc.bot50",
          "wealth.top1","wealth.top10","wealth.bot50")

beta <- rep(NA,length(vars))
se <- rep(NA,length(vars))
obs <- rep(NA,length(vars))
beta2 <- rep(NA,length(vars))
se2 <- rep(NA,length(vars))
obs2 <- rep(NA,length(vars))


for (i in 1:length(vars)) {
  g <- vars[i]
  temp <- dfgi %>%
    filter(!is.na(!!sym(g)))%>%
    mutate(x=!!sym(g))
  m1 <- glm(erosion.strict ~ x, data = temp, family = binomial)
  crse <- coeftest(m1, cluster.vcov(m1, temp$country.name))
  beta[i] <- crse[2,1]
  se[i] <- crse[2,2]
  obs[i] <- nobs(crse)
  
  m1 <- glm(erosion.strict ~ log(gdppc) + year + x,data=temp, family=binomial)
  crse <- coeftest(m1, cluster.vcov(m1, temp$country.name))
  beta2[i] <- crse[4,1]
  se2[i] <- crse[4,2]
  obs2[i] <- nobs(crse)
}

res <- data.frame(vars,beta,se,obs,beta2,se2,obs2)

res2 <- res%>%
  mutate(var=vars,var.num=row_number())%>%
  mutate(var=recode(var,"gini_disp"="Gini\nSWIID",
                    "inc.top1"="Top 1% Income Share\nWID",
                    "inc.top10"="Top 10% Income Share\nWID",
                    "inc.bot50"="Bottom 50% Income Share\nWID",
                    "wealth.top1"="Top 1% Wealth Share\nWID",
                    "wealth.top10"="Top 10% Wealth Share\nWID",
                    "wealth.bot50"="Bottom 50% Wealth Share\nWID",
                    "pre.wid.gini"="Pre-fisc Gini\nWID",
                    "wid.gini"="Gini\nWID",
                    "wiid.gini"="Gini\nWIID",
                    "wb.gini"="Gini\nWorld Bank"))%>%
  mutate(var.lab=paste(var," (n=",obs,")",sep=""))%>%
  mutate(vf=as.factor(var.lab))%>%
  mutate(vf=fct_reorder(vf, -var.num))%>%
  mutate(sig=(beta-1.96*se>0)|(beta+1.96*se<0),
         sig2=(beta2-1.96*se2>0)|(beta2+1.96*se2<0))


ggplot(res2[6:11,],aes(x=beta,y=vf))+
  geom_vline(xintercept=0,color="gray60")+
  geom_errorbarh(aes(xmin=beta-1.96*se,xmax=beta+1.96*se),height=0,
                 position = position_nudge(y = 0.1))+
  geom_point(position = position_nudge(y = 0.1),aes(shape=sig))+
  geom_errorbarh(aes(xmin=beta2-1.96*se2,xmax=beta2+1.96*se2),height=0,
                 color="gray70",position = position_nudge(y = -0.1))+
  geom_point(aes(x=beta2,shape=sig2),fill="white",color="gray70",position = position_nudge(y = -0.1))+
  ylab("")+
  xlab("Estimated Coefficient")+
  scale_shape_manual(values=c(21,19))+
  theme_bw()+
  theme(panel.grid = element_blank(),legend.position = "none")

ggsave("figures/fig-3-new.png",width=4.25,height=3.25)


## Bottom 50% Income ----
temp <- dfgi %>%
  filter(!is.na(inc.bot50),!is.na(gdppc))

m1 <- glm(erosion.strict ~ inc.bot50 + log(gdppc) + year, data=temp, family=binomial)
crse1 <- coeftest(m1, cluster.vcov(m1, temp$country.name))
# Bottom 50% income p-value: 0.051:
crse1[2,4]

